##########################################
########## Topic Analyses ###############

# Data In: "posts.csv"
          
# Data Out: 
# Figures 4a, 4b, 5, 6, A2, A8, A9

###########################################

#Load Packages
library(readr)
library(tidyverse)
library(lubridate)


# Set options for plotting 
options(scipen=999999)

# Set working directory to replication folder (Refugee_Info_Replication)

# Open code file from "Refugee_Info_Replication/code/post_type_analysis.R"

# Obtain the full path of the current script in RStudio
script_path <- rstudioapi::getActiveDocumentContext()$path

# If the script path is non-empty, proceed to set the working directory
if (!is.null(script_path)) {
  # Calculate the parent directory of the script's directory
  parent_directory <- dirname(dirname(script_path))
  
  # Set the working directory to the parent directory
  setwd(parent_directory)
} else {
  cat("Script path is not set. Ensure your script is saved and you are running RStudio.")
}
# Check Working Directory
getwd()

# Read in Posts, Comments & Metadata
posts<-read_csv("data/posts.csv")


#############
# FIGURE 4a #
#############

# Topic Salience by Source

topic_salience <- posts %>%
  group_by(post_type) %>%
  summarize(denominator = n(),
            asylum = sum(asylum)/denominator,
            employment = sum(employment)/denominator, 
            housing = sum(housing)/denominator, 
            healthcare=sum(healthcare)/denominator,
            `refugee camp`=sum(camp)/denominator,
            return = sum(return)/denominator,
            `land travel`=sum(land_travel)/denominator ,
            `sea travel` = sum(sea_travel)/denominator,
            education = sum(scholarship)/denominator,
            smuggling = sum(smuggling)/denominator)%>%
  pivot_longer(cols=asylum:smuggling)

topic_salience$label<-factor(topic_salience$name,levels = c("asylum", "refugee camp", "employment", "land travel", "sea travel",  "education", "return","healthcare", "smuggling", "housing" ))
topic_salience<-na.omit(topic_salience)

ggplot(topic_salience, aes(fill=post_type, y=value, x=label)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Proportion of Posts", x = "Topic")+
  theme(axis.text.x=element_text(angle=90,hjust=1))+
  scale_fill_grey()+
  theme(legend.title=element_blank())
ggsave("plots/Figure_4a.pdf", width = 11, height = 7)


#############
# FIGURE 4b #
#############

# First calculate engagement by topic 

camp_engagement = posts %>% 
  group_by(post_type, camp) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
camp_engagement<-na.omit(camp_engagement)
camp_engagement$prop<-camp_engagement$epm/camp_engagement$ppm
camp_engagement$type<-"refugee camps"
camp_engagement<-subset(camp_engagement, camp==TRUE)

asylum_engagement = posts %>% 
  group_by(post_type, asylum) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
asylum_engagement<-na.omit(asylum_engagement)
asylum_engagement$prop<-asylum_engagement$epm/asylum_engagement$ppm
asylum_engagement$type<-"asylum"
asylum_engagement<-subset(asylum_engagement, asylum==TRUE)

employment_engagement = posts %>% 
  group_by(post_type, employment) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
employment_engagement<-na.omit(employment_engagement)
employment_engagement$prop<-employment_engagement$epm/employment_engagement$ppm
employment_engagement$type<-"employment"
employment_engagement<-subset(employment_engagement, employment==TRUE)

land_travel_engagement = posts %>% 
  group_by(post_type, land_travel) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
land_travel_engagement<-na.omit(land_travel_engagement)
land_travel_engagement$prop<-land_travel_engagement$epm/land_travel_engagement$ppm
land_travel_engagement$type<-"land travel"
land_travel_engagement<-subset(land_travel_engagement, land_travel==TRUE)

sea_travel_engagement = posts %>% 
  group_by(post_type, sea_travel) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
sea_travel_engagement<-na.omit(sea_travel_engagement)
sea_travel_engagement$prop<-sea_travel_engagement$epm/sea_travel_engagement$ppm
sea_travel_engagement$type<-"sea travel"
sea_travel_engagement<-subset(sea_travel_engagement, sea_travel==TRUE)

education_engagement = posts %>% 
  group_by(post_type, scholarship) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
education_engagement<-na.omit(education_engagement)
education_engagement$prop<-education_engagement$epm/education_engagement$ppm
education_engagement$type<-"education"
education_engagement<-subset(education_engagement, scholarship==TRUE)

return_engagement = posts %>% 
  group_by(post_type, return) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
return_engagement<-na.omit(return_engagement)
return_engagement$prop<-return_engagement$epm/return_engagement$ppm
return_engagement$type<-"return"
return_engagement<-subset(return_engagement, return==TRUE)

healthcare_engagement = posts %>% 
  group_by(post_type, healthcare) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
healthcare_engagement<-na.omit(healthcare_engagement)
healthcare_engagement$prop<-healthcare_engagement$epm/healthcare_engagement$ppm
healthcare_engagement$type<-"healthcare"
healthcare_engagement<-subset(healthcare_engagement, healthcare==TRUE)

smuggling_engagement = posts %>% 
  group_by(post_type, smuggling) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
smuggling_engagement<-na.omit(smuggling_engagement)
smuggling_engagement$prop<-smuggling_engagement$epm/smuggling_engagement$ppm
smuggling_engagement$type<-"smuggling"
smuggling_engagement<-subset(smuggling_engagement, smuggling==TRUE)

housing_engagement = posts %>% 
  group_by(post_type, housing) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n()))
housing_engagement<-na.omit(housing_engagement)
housing_engagement$prop<-housing_engagement$epm/housing_engagement$ppm
housing_engagement$type<-"housing"
housing_engagement<-subset(housing_engagement, housing==TRUE)

# Combine engagement totals 
engagement_total<-rbind(camp_engagement, asylum_engagement, employment_engagement, land_travel_engagement, sea_travel_engagement, education_engagement, return_engagement, smuggling_engagement, healthcare_engagement, housing_engagement)

# Plot Histogram 

ggplot(engagement_total, aes(fill=post_type, y=prop, x=type)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Average Engagement Per Post", x = "Topic")+
  theme(axis.text.x=element_text(angle=90,hjust=1))+
  scale_fill_grey()+
  theme(legend.title=element_blank())
ggsave("plots/Figure_4b.pdf", width = 11, height = 7)


#############
# FIGURE 5 #
#############

# For each topic, calculate the salience over time and plot 

asylum_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(asylum)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
asylum_salience2<-subset(asylum_salience, post_type!="news")
asylum_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_asylum.pdf", width = 11, height = 7)


camp_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(camp)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
camp_salience2<-subset(camp_salience, post_type!="news")
camp_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_camp.pdf", width = 11, height = 7)

# For each topic, calculate the salience over time and plot 

employment_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(employment)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
employment_salience2<-subset(employment_salience, post_type!="news")
employment_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_employment.pdf", width = 11, height = 7)


# For each topic, calculate the salience over time and plot 

return_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(return)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
return_salience2<-subset(return_salience, post_type!="news")
return_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_return.pdf", width = 11, height = 7)

# For each topic, calculate the salience over time and plot 

smuggling_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(smuggling)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
smuggling_salience2<-subset(smuggling_salience, post_type!="news")
smuggling_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_smuggling.pdf", width = 11, height = 7)

# For each topic, calculate the salience over time and plot 

education_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(scholarship)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
education_salience2<-subset(education_salience, post_type!="news")
education_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_education.pdf", width = 11, height = 7)

# For each topic, calculate the salience over time and plot 

sea_travel_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(sea_travel)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
sea_travel_salience2<-subset(sea_travel_salience, post_type!="news")
sea_travel_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_sea_travel.pdf", width = 11, height = 7)

# For each topic, calculate the salience over time and plot 

land_travel_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(land_travel)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
land_travel_salience2<-subset(land_travel_salience, post_type!="news")
land_travel_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_land_travel.pdf", width = 11, height = 7)

# For each topic, calculate the salience over time and plot 

healthcare_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(healthcare)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
healthcare_salience2<-subset(healthcare_salience, post_type!="news")
healthcare_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_healthcare.pdf", width = 11, height = 7)

# For each topic, calculate the salience over time and plot 

housing_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(housing)/n()) %>%
  filter(month >= ymd("2013-01-01")) 

#Just Official vs. Unofficial 
housing_salience2<-subset(housing_salience, post_type!="news")
housing_salience2 %>% 
  ggplot() + 
  aes(x = month, y = prop, group=post_type, colour=post_type, fill=post_type) + 
  geom_smooth(method=loess, span=1, se=FALSE) +
  geom_point()+
  labs(y = "Monthly Proportion of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_5_housing.pdf", width = 11, height = 7)

############
# Figure 6 #
############

# Subset Data to Official Posts Only
official<-subset(posts, post_type=="official")

# Histograms by Source Type
official$count<-1
topic_salience <- official %>%
  group_by(pre_2016_source) %>%
  summarize(denominator = n(),
            asylum = sum(asylum)/denominator,
            employment = sum(employment)/denominator, 
            housing = sum(housing)/denominator, 
            healthcare=sum(healthcare)/denominator,
            `refugee camp`=sum(camp)/denominator,
            return = sum(return)/denominator,
            `land travel`=sum(land_travel)/denominator ,
            `sea travel` = sum(sea_travel)/denominator,
            education = sum(scholarship)/denominator,
            smuggling = sum(smuggling)/denominator)%>%
  pivot_longer(cols=asylum:smuggling)

topic_salience$label<-factor(topic_salience$name,levels = c("asylum", "refugee camp", "employment", "land travel", "sea travel",  "education", "return","healthcare", "smuggling", "housing" ))
topic_salience<-na.omit(topic_salience)

ggplot(topic_salience, aes(fill=pre_2016_source, y=value, x=label)) + 
  geom_bar(position="dodge", stat="identity", color="black")+theme_minimal(base_size=22)+
  labs(y = "Proportion of Posts", x = "Topic")+
  theme(axis.text.x=element_text(angle=90,hjust=1))+
  scale_fill_grey()+
  theme(legend.title=element_blank())
ggsave("plots/Figure_6.pdf", width = 11, height = 7)

#############
# Figure A2 #
#############

# For each topic, plot engagement per post over time by topic and source 

asylum_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(asylum/n())) %>%
  filter(month >= ymd("2013-01-01")) 

asylum_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, asylum) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
asylum_engagement<-na.omit(asylum_engagement)
asylum_engagement$prop<-asylum_engagement$epm/asylum_engagement$ppm
asylum_engagement<-subset(asylum_engagement, post_type!="news")

asylum_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_asylum.pdf", width = 11, height = 7)



camp_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(camp/n())) %>%
  filter(month >= ymd("2013-01-01")) 

camp_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, camp) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
camp_engagement<-na.omit(camp_engagement)
camp_engagement$prop<-camp_engagement$epm/camp_engagement$ppm
camp_engagement<-subset(camp_engagement, post_type!="news")

camp_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_camp.pdf", width = 11, height = 7)

education_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(scholarship/n())) %>%
  filter(month >= ymd("2013-01-01")) 

education_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, scholarship) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
education_engagement<-na.omit(education_engagement)
education_engagement$prop<-education_engagement$epm/education_engagement$ppm
education_engagement<-subset(education_engagement, post_type!="news")

education_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_education.pdf", width = 11, height = 7)


employment_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(employment/n())) %>%
  filter(month >= ymd("2013-01-01")) 

employment_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, employment) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
employment_engagement<-na.omit(employment_engagement)
employment_engagement$prop<-employment_engagement$epm/employment_engagement$ppm
employment_engagement<-subset(employment_engagement, post_type!="news")

employment_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_employment.pdf", width = 11, height = 7)

healthcare_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(healthcare/n())) %>%
  filter(month >= ymd("2013-01-01")) 

healthcare_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, healthcare) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
healthcare_engagement<-na.omit(healthcare_engagement)
healthcare_engagement$prop<-healthcare_engagement$epm/healthcare_engagement$ppm
healthcare_engagement<-subset(healthcare_engagement, post_type!="news")

healthcare_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_healthcare.pdf", width = 11, height = 7)

housing_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(housing/n())) %>%
  filter(month >= ymd("2013-01-01")) 

housing_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, housing) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
housing_engagement<-na.omit(housing_engagement)
housing_engagement$prop<-housing_engagement$epm/housing_engagement$ppm
housing_engagement<-subset(housing_engagement, post_type!="news")

housing_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_housing.pdf", width = 11, height = 7)

land_travel_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(land_travel/n())) %>%
  filter(month >= ymd("2013-01-01")) 

land_travel_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, land_travel) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
land_travel_engagement<-na.omit(land_travel_engagement)
land_travel_engagement$prop<-land_travel_engagement$epm/land_travel_engagement$ppm
land_travel_engagement<-subset(land_travel_engagement, post_type!="news")

land_travel_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_land_travel.pdf", width = 11, height = 7)

sea_travel_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(sea_travel/n())) %>%
  filter(month >= ymd("2013-01-01")) 

sea_travel_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, sea_travel) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
sea_travel_engagement<-na.omit(sea_travel_engagement)
sea_travel_engagement$prop<-sea_travel_engagement$epm/sea_travel_engagement$ppm
sea_travel_engagement<-subset(sea_travel_engagement, post_type!="news")

sea_travel_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_sea_travel.pdf", width = 11, height = 7)

return_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(return/n())) %>%
  filter(month >= ymd("2013-01-01")) 

return_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, return) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
return_engagement<-na.omit(return_engagement)
return_engagement$prop<-return_engagement$epm/return_engagement$ppm
return_engagement<-subset(return_engagement, post_type!="news")

return_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_return.pdf", width = 11, height = 7)

smuggling_salience = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type) %>%
  summarise(prop = sum(smuggling/n())) %>%
  filter(month >= ymd("2013-01-01")) 

smuggling_engagement = posts %>% 
  group_by(month = floor_date(date, unit="month"), post_type, smuggling) %>%
  summarise(epm = sum(engagement_fb), ppm=sum(n())) %>%
  filter(month >= ymd("2013-01-01")) 
smuggling_engagement<-na.omit(smuggling_engagement)
smuggling_engagement$prop<-smuggling_engagement$epm/smuggling_engagement$ppm
smuggling_engagement<-subset(smuggling_engagement, post_type!="news")

smuggling_engagement %>% 
  ggplot() + 
  aes(x = month, y = prop, color=post_type) + 
  geom_smooth(method=loess, span=.05, se=FALSE) +
  geom_point()+
  labs(y = "Average Monthly Engagement Per Post Post", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.title = element_blank())+
  scale_color_grey(start=0, end=.6)+
  scale_fill_grey(start=0, end=.6)
ggsave("plots/Figure_A2_smuggling.pdf", width = 11, height = 7)


#############
# Figure A8 #
#############


# Conduct difference in means tests and create coefficient plot 

data <- posts[c("post_type", "land_travel", "sea_travel", "smuggling", "asylum", "return", "camp", "employment", "scholarship", "housing", "healthcare")]
data <- subset(data, post_type == "official" | post_type == "unofficial")

# List of topics
variables <- c("land_travel", "sea_travel", "smuggling", "asylum", "return", "camp", "employment", "scholarship", "housing", "healthcare")

# Initialize a data frame to store t-test results
t_test_results <- data.frame(Variable = character(),
                             Estimate = numeric(),
                             Lower_CI = numeric(),
                             Upper_CI = numeric(),
                             stringsAsFactors = FALSE)

# Perform t-tests and store results
for(var in variables) {
  # Calculate the t-test
  t_test_result <- t.test(get(var) ~ post_type, data = data)
  
  # Store results in the data frame directly using the t-test output
  t_test_results <- rbind(t_test_results, data.frame(
    Variable = var,
    Estimate = t_test_result$estimate[1] - t_test_result$estimate[2],  # official - unofficial
    Lower_CI = t_test_result$conf.int[1],
    Upper_CI = t_test_result$conf.int[2]
  ))
}

# Create coefficient plot 
ggplot(t_test_results, aes(x = Estimate, y = Variable)) +
  geom_point() +
  geom_errorbarh(aes(xmin = Lower_CI, xmax = Upper_CI), height = 0.2) +
  labs(title = "",
       x = "Difference in Mean Salience (Official - Unofficial)",
       y = "Variable") +
  theme_minimal(base_size=22)
ggsave("plots/Figure_A8.pdf", width=11, height=7)

##############
# Figure A9 #
##############

# Conduct difference in means tests of engagement by topic and create coefficient plot 

topic_columns<-variables

data <- posts %>%
  pivot_longer(cols = topic_columns, names_to = "topic", values_to = "value") %>%
  filter(value == TRUE) %>%
  mutate(topic = sub("^topic_", "", topic)) 

# Drop the 'value' column

data <- data[c("topic", "engagement_fb", "post_type")]
data<-subset(data, post_type=="official"|post_type=="unofficial")

# List of topics to test
topics <- unique(data$topic)

# Initialize a data frame to store t-test results
t_test_results <- data.frame(Topic = character(),
                             Source_Type = character(),
                             Estimate = numeric(),
                             Lower_CI = numeric(),
                             Upper_CI = numeric(),
                             stringsAsFactors = FALSE)

# Perform t-tests and store results
for(current_topic in topics) {
  # Subset data for the topic
  topic_data <- filter(data, topic == current_topic)
  
  if(nrow(topic_data) > 1 && all(table(topic_data$post_type) > 1)) {
    # Perform the t-test 
    t_test_result <- t.test(engagement_fb ~ post_type, data = topic_data)
    
    # Store results in the data frame
    t_test_results <- rbind(t_test_results, data.frame(
      Topic = current_topic,
      Source_Type = "Official vs Unofficial",
      Estimate = t_test_result$estimate[1] - t_test_result$estimate[2],  # official - unofficial
      Lower_CI = t_test_result$conf.int[1],
      Upper_CI = t_test_result$conf.int[2]
    ))
  } else {
    t_test_results <- rbind(t_test_results, data.frame(
      Topic = current_topic,
      Source_Type = "Official vs Unofficial",
      Estimate = NA,
      Lower_CI = NA,
      Upper_CI = NA
    ))
  }
}

# Create coefficient plot using ggplot2
ggplot(t_test_results, aes(x = Estimate, y = Topic)) +
  geom_point() +
  geom_errorbarh(aes(xmin = Lower_CI, xmax = Upper_CI), height = 0.2) +
  labs(title = "",
       x = "Difference in Means Engagement (Official - Unofficial)",
       y = "Topic") +
  theme_minimal(base_size=22)
ggsave("plots/Figure_A9.pdf", width=11, height=7)





